package com.huilian.test.pdf;

import org.apache.pdfbox.pdmodel.PDDocument;
import org.fit.pdfdom.PDFDomTree;

import javax.xml.parsers.ParserConfigurationException;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;

public class PdfToHtml {
    public void generateHTMLFromPDF(String filename) throws IOException, ParserConfigurationException {
        PDDocument pdf = PDDocument.load(new File(filename));
        Writer output = new PrintWriter("C:\\Users\\luod\\Downloads\\pdf2.html", "utf-8");
        new PDFDomTree().writeText(pdf, output);
        output.close();
    }

    public void convertPdf2Html(File input,Writer out) throws IOException, ParserConfigurationException {
        PDDocument pdf = PDDocument.load(input);
        PDFDomTree tree = new PDFDomTree();
        tree.writeText(pdf,out);
    }

    public static void main(String[] args) throws IOException, ParserConfigurationException {
        String file="C:\\Users\\luod\\Downloads\\SCC20181011150602339708369.pdf";
        new PdfToHtml().generateHTMLFromPDF(file);
    }
}
